/*******************************************************************************

This code file cleans the Section 8, LIHTC, and NYCHA data for comparison to 421-a units.

*******************************************************************************/

*** Manage settings

	run "~/Dropbox (MIT)/Research/NYC421a/code/modules/settings.do"
	
******************************** Project-Based Section 8 *************************************
	
*** Load data

	* Set working directory
	cd "$data/raw/OtherHousingAssistance/Section8"

	import excel "activeportfoliopropdata.xls", sheet("Step_01_Property_Level_data") firstrow clear

*** Restrict sample to Section 8 in NYC counties

	keep if state_code == "NY"
	
	keep if county_code == "047" | county_code == "081" | county_code == "085" | county_code == "005" | county_code == "061"
	
	keep if is_sec8_ind == "Y"
	
*** Clean data

	* Drop unneeded variables
	drop 	hub_name_text servicing_site_name_text address_line2_text ///
			state_code state_name_text zip4_code county_code ///
			county_name_text msa_code msa_name_text congressional_district_code ///
			placed_base_city_name_text property_category_name is_under_management_ind ///
			is_hospital_ind is_nursing_home_ind is_board_and_care_ind ///
			is_assisted_living_ind is_hud_held_ind is_hud_owned_ind ///
			is_refinanced_ind is_insured_ind is_mip_ind is_non_insured_ind ///
			is_co_insured_ind is_subsidized_ind is_202_811_ind is_221d3_ind ///
			is_221d4_ind is_236_ind is_section_236_rap_ind is_bmir_ind ///
			is_rent_supplement_ind has_active_assistance_ind is_risk_sharing_ind ///
			has_active_irp_ind has_use_restriction_ind has_service_agreement_ind ///
			is_pac_ind is_prac_ind is_sec8_202_ind is_sec8_state_agency_hfa_ind ///
			is_sec8_lmsa_ind is_sec8_prprty_disposition_ind is_sec8_preservation_ind ///
			is_sec8_othr_nw_cnstrction_ind is_sec8_other_sub_rehab_ind is_sec8_ind ///
	
	* Format
	format property_id %18.0f
	rename address_line1_text address
	
	tempfile tmp
	save `tmp', replace
	
*** Merge in FIPS codes

	import delimited "addresses_geocoded.csv", encoding(ISO-8859-1) clear
	merge 1:1 address using `tmp', nogen
	
*** Back to cleaning

	drop n state zip city property_id property_name_text city_name_text occupancy_date
	
	destring latitude longitude state_fips county_fips tract_fips block_fips zip_code, force replace
	
	rename total_assisted_unit_count assisted_unit_count
		
	rename tract_fips ct2010
	rename block_fips cb2010
	
	gen bldg_type = 1
	
*** Save dataset

	tempfile section8_units
	save `section8_units', replace
	
********************************** LIHTC ***************************************

*** Load data
	
	* Set working directory
	cd "$data/raw/OtherHousingAssistance/LIHTC"

	import delimited "$data/raw/OtherHousingAssistance/LIHTC/LIHTCPUB.csv", encoding(ISO-8859-1) clear
	
*** Restrict sample to LIHTC in NYC counties

	keep if proj_st == "NY"
	
	gen cnty = cnty2010
	replace cnty = cnty2000 if missing(cnty)
	replace cnty = cnty1990 if missing(cnty)
	
	keep if cnty == 47 | cnty == 81 | cnty == 85 | cnty == 5 | cnty == 61

	* Keep relevant variables
	keep project proj_add proj_cty proj_zip latitude longitude n_units li_units
	
*** Merge in FIPS codes

	* Create merge key
	gen n = _n
	
	* Save in tempfile
	tempfile tmp
	save `tmp', replace
	
	* Merge on key
	import delimited "addresses_geocoded.csv", encoding(ISO-8859-1) clear
	merge 1:1 n using `tmp', nogen
	
*** Back to cleaning 
	
	drop n
	destring proj_zip, replace
	
	rename li_units assisted_unit_count 
	rename n_units total_unit_count
	rename proj_zip zip_code
	rename tract_fips ct2010
	rename block_fips cb2010
	
	gen bldg_type = 2
	
*** Save dataset

	tempfile LIHTC_units
	save `LIHTC_units', replace
	
********************************** NYCHA ***************************************

*** Load data
	
	* Set working directory
	cd "$data/raw/OtherHousingAssistance/NYCHA"
	
	import delimited "NYCHA_Development_Data_Book.csv", encoding(ISO-8859-1) clear
	
*** Clean data

	* Drop unused variables
	keep development tds totalnumberofapartments

	* Clean identifier
	destring tds, replace force
	drop if missing(tds)
	
	* Rename variables
	rename development address
	rename totalnumberofapartments assisted_unit_count
	
	* Clean number of units
	replace assisted_unit_count = subinstr(assisted_unit_count,",","",.)
	destring assisted_unit_count, replace
	
	gen total_unit_count = assisted_unit_count 
	gen bldg_type = 3
	
*** Merge in geodata

	merge 1:1 tds using "NYCHA_Developments/nycha_geodata.dta", nogen keep(3)
	
*** Save data

	tempfile NYCHA_units
	save `NYCHA_units', replace

********************************* Combine datasets *****************************

*** Merge together Section 8, LIHTC, and NYCHA datasets
	
	use `LIHTC_units', clear
	append using `section8_units'
	append using `NYCHA_units'
	
	gen borough = .
	replace borough = 1 if county_fips == 61
	replace borough = 2 if county_fips == 5
	replace borough = 3 if county_fips == 47
	replace borough = 4 if county_fips == 81 
	replace borough = 5 if county_fips == 85
	
	label define borough_label 1 "Manhattan" 2 "Bronx" 3 "Brooklyn" 4 "Queens" 5 "Staten Island"
	label values borough borough_label
	
	label define bldg_type_label 1 "Section 8" 2 "LIHTC" 3 "NYCHA"
	label values bldg_type bldg_type_label
	
*** Merge in Census block level characteristics

	merge m:1 borough ct2010 cb2010 using "$data/clean/census_area_characteristics.dta", nogen keep(1 3)
	
*** Merge in NTAs

	merge m:1 ct2010 borough using "$data/clean/ct2010_to_nta.dta", nogen keep(1 3)

*** Save dataset

	save "$data/clean/non_421a_units.dta", replace
